#!/usr/bin/env python
# coding: utf-8
# # NumPy: creating and manipulating numerical data
# ## Section 1: What is NumPy and numpy arrays
#
# **Python:** - has built-in
# - containers (costless insertion and append), dictionnaries (fast lookup)
# - high-level number objects (integers, floating points)
#
# **NumPy** is:
# - extension package to Python to multidimensional arrays
# - faster (as you'll see below)
# - convenient and tested by scientific community
# In[1]:
import numpy as np
# In[2]:
a = np.array([0,1,2,3])
a
# In[3]:
l = range(1000)
get_ipython().run_line_magic('timeit', '[i**2 for i in l]')
# In[4]:
a = np.arange(1000)
get_ipython().run_line_magic('timeit', 'a**2')
# In[5]:
help(np.array)
# In[6]:
np.lookfor('create array')
# In[7]:
help(np.lookfor)
# ## Creating Arrays
#
# ### 1-Dimensional
# In[8]:
a = np.array([0,1,2,3])
a
# In[9]:
a.ndim
# In[10]:
a.shape
# In[11]:
len(a)
# ### 2-D, 3-D and more
# In[12]:
b = np.array([[0, 1, 2], [3, 4, 5]])
b
# In[13]:
b.ndim
# In[14]:
b.shape
# In[15]:
len(b)
# In[16]:
c = np.array([[[1], [2]], [[3], [4]]])
c
# In[17]:
c.shape
# In[18]:
print("Number of dimensions in array c: ",c.ndim)
# ### Evenly spaced
# In[19]:
#Evenly spaced - notice how it always starts with 0 .. (n-1) and not 1!
a = np.arange(10)
a
# ### or, number of points using linspace
# In[20]:
# number of points
c = np.linspace(0, 1, 9) # start, end, number of points
c
# In[21]:
d = np.linspace(0, 1, 5, endpoint=False) # meaning it doesn't stop at 1.
d
# ### Common arrays
# In[22]:
a = np.ones((3, 3)) # (3, 3) would be a tuple here
a
# In[23]:
type(a)
# In[24]:
b = np.zeros((2 ,2))
b
# In[25]:
c = np.eye(3) # An identity matrix
c
# In[26]:
d = np.diag(np.array([1,2,3,4]))
d
# ### Random numbers
# In[27]:
e = np.random.rand(4) # uniform in [0, 1]
e
# In[28]:
f = np.random.randn(4) #Gaussian
f
# type help(np.random.randn) to understand more
# In[29]:
np.random.seed(1234) #setting the random seed
help(np.random.seed)
# In[ ]:
# ## Exercise 1
#
# **Create an array that looks like this:**
#
# $$x =
# \begin{bmatrix}
# 1 & 1 & 1 & 1 \\
# 1 & 1 & 1 & 1 \\
# 1 & 1 & 1 & 8 \\
# 1 & 6 & 1 & 1 \\
# \end{bmatrix}\tag{1}$$ and,
#
# **another one** that looks like this:
#
# $$y =
# \begin{bmatrix}
# 0. & 0. & 0. & 0. & 0.\\
# 7. & 0. & 0. & 0. & 0.\\
# 0. & 8. & 0. & 0. & 0.\\
# 0. & 0. & 9. & 0. & 0.\\
# 0. & 0. & 0. & 10. & 0.\\
# 0. & 0. & 0. & 0. & 11.\\
# \end{bmatrix}\tag{2}$$
#
# and lastly,
#
# **create** this simple array
#
# $$\begin{bmatrix}
# 0. & 0. & 0. & 0. & 0.\\
# 0. & 0. & 0. & 0. & 0.\\
# 1. & 0. & 0. & 0. & 0.\\
# 0. & 1. & 0. & 0. & 0.\\
# 0. & 0. & 1. & 0. & 0.\\
# 0. & 0. & 0. & 1. & 0.\\
# \end{bmatrix}\tag{3}$$
# In[30]:
help(np.eye)
help(np.diag)
# ### Basic Data Types
# In[31]:
a = np.array([1,2,3])
a.dtype
# In[32]:
b = np.array([1., 2., 3.])
b.dtype
# ### Note: Datatype and performance
#
# Remember, different datatypes allow us to store data more compactly but most of the time folks as well as the interpreter auto-detects the datatype from input.
# In[33]:
# Make it explicit
c = np.array([1, 2, 3], dtype=float)
c.dtype
# In[34]:
# Default data type is automatically float
a = np.ones((3,3))
a.dtype
# ### More datatypes
#
# **Complex**
#
# ** Bool **
#
# ** Strings **
#
# ** Integers **
# In[35]:
# Complex
d = np.array([1+2j, 4+5j, 6+8*1j])
d.dtype
# In[36]:
# Bool
e = np.array([True, False, False, True])
e.dtype
# In[37]:
f = np.array(['Bonjour', 'Hi', 'Hola', 'Ole', 'Namaste Ji'])
f.dtype # outputs string containing max 10 letters...
# ## Some Basic Visualization
# In[38]:
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
# In[39]:
x = np.linspace(0, 3, 20)
y = np.linspace(0, 9, 20)
plt.plot(x, y) # plots a line
# In[40]:
plt.plot(x, y, 'o')
# ### 2D arrays
# In[41]:
image = np.random.rand(40, 40)
plt.imshow(image, cmap=plt.cm.Blues)
plt.colorbar()
# In[42]:
# or...
plt.imshow(image, cmap=plt.cm.hot)
# ## Indexing and Slicing
#
# - In 2D, the first dimension corresponds to rows, the second to columns.
# - for multidimensional a, a[0] is interpreted by taking all elements in the unspecified dimensions.
# In[43]:
a = np.arange(10)
a # indexing begins at 0 , unlike fortran or
# In[44]:
# For multi-dimensional arrays, indexing are tuples of integers
a = np.diag(np.arange(3))
a
# In[45]:
a[1,1]
# In[46]:
a[1]
# In[47]:
a[2, 1] = 10 # replaces row 3, second value
# In[48]:
a
# In[49]:
# Slicing
a = np.arange(10)
a
# In[50]:
a[2:9:3] # [start:end:step]
# In[51]:
a[:4] # note: last index isn't included
# In[52]:
# all three slice components are not required
# ### An illustration of Numpy indexing and splicing
#
#
# In[53]:
a = np.arange(6) + np.arange(0, 51, 10)[:, np.newaxis]
print("Answer to above questions is: ")
a
# In[54]:
print("Orange is: ", a[0, 3:5])
# Try similarly others to get more answers and play with this array a bit more
# In[55]:
np.diag(np.tile(4,4))
# In[56]:
x = np.array([0, 1, 2])
np.tile(x, 4)
# In[57]:
y = np.array([[1,2], [3,4]])
np.tile(y, 3)
# ### Solve this puzzel to arrive to the following answer:
#
# $$x =
# \begin{bmatrix}
# 4 & 5 & 4 & 5 & 4 & 5 \\
# 6 & 7 & 6 & 7 & 6 & 7 \\
# 4 & 5 & 4 & 5 & 4 & 5 \\
# 6 & 7 & 6 & 7 & 6 & 7 \\
# \end{bmatrix}\tag{1}$$
#
# **Hint**: play with np.tile()
#
# ##### I will publish a full solution page later...
# ## Copies and views
#
# A slicing operation creates a view on the original array, which is just a way of accessing array data. Thus the original array is not copied in memory. You can use `np.may_share_memory()` to check if two arrays share the same memory block. Note however, that this uses heuristics and may give you false positives.
# In[58]:
a = np.arange(10)
a # you get the range from 0 to 9 - total of 10
# In[59]:
b = a[::2]
b # you get every 3rd object, as 0,1 -->2, 2,2 -->4 and so on
# In[60]:
np.may_share_memory(a, b)
# In[61]:
b[0] = 12 #we replace 0 with 12
b
# In[62]:
# So what is a then?
a
# In[63]:
# Let's rearrange
a = np.arange(10)
c = a[::2].copy() # we force copy
c[0] = 12
a
# #### So, what happened?
#
# Let's do it step by step
# In[64]:
a = np.arange(10)
a
# In[65]:
c = a[::2].copy()
c
# In[66]:
c[0] = 12
c
# In[67]:
a
# #### They simply didnt share the same memory block and thus giving you a false positive!
#
# In[68]:
np.may_share_memory(a, c)
# ## Fancy Indexing
#
# NumPy arrays can be indexed with slices, but also with boolean or integer arrays **(masks)**. This method is called fancy indexing. It creates copies **not** views.
# In[69]:
np.random.seed(3)
a = np.random.randint(0, 20, 15)
a
# In[70]:
(a % 3 ==0)
# In[71]:
mask = (a % 3 == 0)
get_from_a = a[mask]
get_from_a # you extract a sub-array from mask
# In[72]:
# Indexing with mask can be fun to assign a new value to a sub-array
a[a % 3 == 0] = -1
a
# ## Indexing with array of integers
# In[73]:
a = np.arange(10)
idx = np.array([[3,4], [6,7]])
# In[74]:
idx.shape
# In[75]:
a[idx]
# ## Here's another fun exercise to play with...
#
# This image shows various ways to index, play with these and come back with your own results 👇
#
#
#
# and your little exercise could be:
#
# ### Exercise 2 : Reproduce this fancy index
# # SOLUTIONS
#
# ## Exercise 1, Solutions 1, 2, 3
#
# **Hint**: use help(np.diag) for info.
#
# and also try out some more yourself!
#
# **NOTE**: As this Numpy lecture expands, the exercise portion will expand too and the solutions will be provided in another ipynb notebook later
# ### --------------------------------------------------------------------------------------------------------------###
# # Solutions and explanations to exercises
# ### --------------------------------------------------------------------------------------------------------------###
# ### Exercise 1: Solution 1
# In[76]:
help(np.diag)
# ### Exercise 1: Solution 1
# In[77]:
a = np.ones((4, 4), dtype=int)
a[3, 1] = 6
a[2, 3] = 8
#a[[3, 1], [2, 3]] = [6, 8] - - yoc can also do them together
print(a)
# ### Exercise 1: Solution 2
# In[78]:
b = np.zeros((6, 5))
b[1:] = np.diag(np.arange(7,12))
b
# ### Exercise 1: Solution 3
# In[79]:
y = np.eye(6, 5, k=-2, dtype=float)
y
# In[ ]: